Using the values generated from COVID-19 cases modeling predict the number of ICU/ventilator facilities required and predict the cost of the expansion based on some arbitrary value.
# Import library
library(ggplot2)
library(tidyr)
library(tseries)
## Warning: package 'tseries' was built under R version 4.0.3
## Registered S3 method overwritten by 'quantmod':
## method from
## as.zoo.data.frame zoo
covid = read.csv("COVID-19_Daily_Cases__Deaths__and_Hospitalizations.csv")
coluNames = c("Date","Daily Total Cases","Daily Total Death","Daily Hospitalized cases","Case 0-17","Case 18-29","Case 30-39","Case 40-49","Case 50-59","Case 60-69","Case 70-79","Case 80+","Case Age unknown")
covid_age = covid[,1:13]
colnames(covid_age) <- coluNames
covid_age$Date = as.Date(covid_age$Date,"%m/%d/%y")
covid_age <- covid_age[order(covid_age$Date, decreasing = FALSE),]
graph1 <- ggplot(covid_age, aes(x = Date, y = `Daily Total Cases`)) + geom_line()
graph1
## Warning: Removed 1 row(s) containing missing values (geom_path).
library(Hmisc)
## Loading required package: lattice
## Loading required package: survival
## Loading required package: Formula
##
## Attaching package: 'Hmisc'
## The following objects are masked from 'package:base':
##
## format.pval, units
describe(covid_age)
## covid_age
##
## 13 Variables 190 Observations
## --------------------------------------------------------------------------------
## Date
## n missing distinct Info Mean Gmd .05
## 189 1 189 1 2020-06-03 63.33 2020-03-10
## .10 .25 .50 .75 .90 .95
## 2020-03-19 2020-04-17 2020-06-03 2020-07-20 2020-08-17 2020-08-26
##
## lowest : 2020-03-01 2020-03-02 2020-03-03 2020-03-04 2020-03-05
## highest: 2020-09-01 2020-09-02 2020-09-03 2020-09-04 2020-09-05
## --------------------------------------------------------------------------------
## Daily Total Cases
## n missing distinct Info Mean Gmd .05 .10
## 190 0 165 1 384.8 323.2 7.7 76.4
## .25 .50 .75 .90 .95
## 172.0 325.5 467.5 879.9 1040.2
##
## lowest : 0 1 3 5 11, highest: 1158 1173 1284 1323 1471
## --------------------------------------------------------------------------------
## Daily Total Death
## n missing distinct Info Mean Gmd .05 .10
## 190 0 52 0.996 15.23 17.17 0.00 0.90
## .25 .50 .75 .90 .95
## 2.00 7.00 25.75 40.10 45.55
##
## lowest : 0 1 2 3 4, highest: 51 52 53 56 57
## --------------------------------------------------------------------------------
## Daily Hospitalized cases
## n missing distinct Info Mean Gmd .05 .10
## 185 5 90 0.999 61.55 64.09 8.2 12.0
## .25 .50 .75 .90 .95
## 16.0 25.0 114.0 160.0 168.0
##
## lowest : 2 3 4 6 8, highest: 181 182 198 202 355
## --------------------------------------------------------------------------------
## Case 0-17
## n missing distinct Info Mean Gmd .05 .10
## 190 0 68 0.999 25.76 24.09 0.00 1.90
## .25 .50 .75 .90 .95
## 7.00 21.00 39.75 57.10 64.00
##
## lowest : 0 1 2 3 4, highest: 70 75 78 81 85
## --------------------------------------------------------------------------------
## Case 18-29
## n missing distinct Info Mean Gmd .05 .10
## 190 0 122 1 83.17 64.92 0.9 15.0
## .25 .50 .75 .90 .95
## 36.0 76.0 114.5 171.6 206.6
##
## lowest : 0 2 3 4 6, highest: 224 232 234 236 247
## --------------------------------------------------------------------------------
## Case 30-39
## n missing distinct Info Mean Gmd .05 .10
## 190 0 110 1 70.38 58.57 1.00 13.80
## .25 .50 .75 .90 .95
## 29.25 62.50 87.00 163.10 196.20
##
## lowest : 0 1 2 3 5, highest: 222 223 224 230 232
## --------------------------------------------------------------------------------
## Case 40-49
## n missing distinct Info Mean Gmd .05 .10
## 190 0 110 1 66.05 60.24 2.00 9.90
## .25 .50 .75 .90 .95
## 29.00 49.00 85.75 158.60 200.40
##
## lowest : 0 1 2 3 5, highest: 223 228 232 243 253
## --------------------------------------------------------------------------------
## Case 50-59
## n missing distinct Info Mean Gmd .05 .10
## 190 0 102 1 59.66 56.95 1.0 6.9
## .25 .50 .75 .90 .95
## 24.0 40.0 83.0 145.1 183.8
##
## lowest : 0 1 3 4 5, highest: 205 206 208 241 247
## --------------------------------------------------------------------------------
## Case 60-69
## n missing distinct Info Mean Gmd .05 .10
## 190 0 91 1 41.28 40.68 0.45 5.00
## .25 .50 .75 .90 .95
## 16.00 25.50 61.00 95.10 120.75
##
## lowest : 0 1 2 3 4, highest: 151 153 156 158 245
## --------------------------------------------------------------------------------
## Case 70-79
## n missing distinct Info Mean Gmd .05 .10
## 190 0 59 0.999 21.99 21.94 0.45 2.90
## .25 .50 .75 .90 .95
## 7.25 13.00 35.00 51.20 62.00
##
## lowest : 0 1 2 3 4, highest: 69 70 74 99 162
## --------------------------------------------------------------------------------
## Case 80+
## n missing distinct Info Mean Gmd .05 .10
## 190 0 51 0.997 16.44 19.45 0 1
## .25 .50 .75 .90 .95
## 4 8 24 41 52
##
## lowest : 0 1 2 3 4, highest: 68 90 94 104 167
## --------------------------------------------------------------------------------
## Case Age unknown
## n missing distinct Info Mean Gmd
## 190 0 3 0.191 0.07368 0.1387
##
## Value 0 1 2
## Frequency 177 12 1
## Proportion 0.932 0.063 0.005
## --------------------------------------------------------------------------------
for (i in coluNames){
titles = paste("Graph of", i, sep = " ")
plot(covid_age[,i], ylab = i, main = titles)
}
for (i in coluNames){
titles = paste("Graph of", i, sep = " ")
boxplot(covid_age[,i],ylab=i,main=titles)
}
# Based on Statistica statistics we can give a rough estimate of of how many people are admitted into hospital.
calcHospital <- function(percentage, name_Case, dataset){
dataset = dataset[name_Case]
newData = dataset*percentage
return(newData)
}
s = calcHospital(0.056, "Daily Total Cases", covid_age)
# Get like a weekly average setting the counter to 7
calcWeek <- function(name_case, dataset){
x = 0
y = 0
var = c()
z = 1
while (y != 190){
y = y+1
if (y%%7 == 0){
temp = sum(dataset[x:y,name_case])
var[z] = temp
z = z+1
x = y
}
}
return(var)
}
week <- calcWeek("Daily Total Cases",covid_age)
week
## [1] 7 119 874 2380 3164 4017 4727 7117 7883 6772 5953 5323 3455 1963 1617
## [16] 1407 1463 1541 1876 1938 2102 2276 2344 2453 2702 2572 1583
weekHos <- calcWeek("Daily Hospitalized cases",covid_age)
plot(week, type="l", ylab="Cases Per Week",xlab="Week",main = "Covid Cases by Weeks")
#lines(weekHos, type="l")
#plot(weekHos, type="l", ylab="Cases Per Week",xlab="Week",main = "Covid Cases Hospitalized by Weeks")
cumulcase <- cumsum(covid_age$`Daily Total Cases`)
plot(x= as.Date(covid_age$Date), cumulcase)
# COVID-19 rates of increase both in cases and hospitalization
calc_Rate <- function(name_case, dataset){
x = 1
y = 2
count = 1
set = c()
while (!(y > 190)){
temp = (dataset[y,name_case] - dataset[x,name_case])/dataset[x,name_case]
set[count] = temp
count = count + 1
x = x+1; y = y+1
}
return(set)
}
r = calc_Rate("Daily Total Cases",covid_age)
r[is.na(r)] <- 0
r[is.infinite(r)] <- 0
print(sum(r)/length(r))
## [1] 0.8713212
boxplot(r, main="Rate % of Growth from Previous day to Next Day", ylab = "Rate %", ylim=c(-3,3))
library(forecast)
library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:Hmisc':
##
## src, summarize
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
# Explanatory data analysis of data
library(lubridate)
##
## Attaching package: 'lubridate'
## The following objects are masked from 'package:base':
##
## date, intersect, setdiff, union
# Trial run of forecasting (Start with train and test split )
data = subset(covid_age, select = c("Date", "Daily Total Cases"))
myts = ts(week, start=decimal_date(as.Date("2020-03-01")), frequency = 52)
plot(myts)
autoplot(myts)
# test = seq(from = as.Date(“2020-03-01”), to = as.Date(“2020-09-05”), by= 7)
# Conducting the Augmented Dickey-Fuller test for unit root
#detach("package:aTSA", unload=TRUE)
library(tseries)
print("Weekly timeseries data")
## [1] "Weekly timeseries data"
adf.test(myts,alternative = "stationary")
##
## Augmented Dickey-Fuller Test
##
## data: myts
## Dickey-Fuller = -2.8588, Lag order = 2, p-value = 0.2446
## alternative hypothesis: stationary
print("Daily new cases timeseries data")
## [1] "Daily new cases timeseries data"
adf.test(covid_age$`Daily Total Cases`,alternative = "stationary")
##
## Augmented Dickey-Fuller Test
##
## data: covid_age$`Daily Total Cases`
## Dickey-Fuller = -1.8822, Lag order = 5, p-value = 0.6255
## alternative hypothesis: stationary
acf(diff(myts))
acf(covid_age$`Daily Total Cases`)
# Forecasting of weekly
fit = auto.arima(week[1:24], seasonal=FALSE)
#plot(fit)
arimafor <- forecast(fit,h=3)
plot(arimafor)
lines(week)
print("Accuracy")
## [1] "Accuracy"
accuracy(arimafor,week[25:27])
## ME RMSE MAE MPE MAPE MASE
## Training set 30.89061 621.6047 412.4783 -524.77395 537.69277 0.6161191
## Test set -456.38457 756.5455 524.6341 -28.28803 30.81392 0.7836463
## ACF1
## Training set -0.04899643
## Test set NA
week[27]
## [1] 1583
range(arimafor$mean[3])
## [1] 2877.291 2877.291
mean(arimafor$mean)/7
## [1] 391.7216
# Forecasting of daily
fit2 = auto.arima(covid_age$`Daily Total Cases`[1:169], seasonal = FALSE)
arimafor2 <- forecast(fit2,h=21)
plot(arimafor2)
lines(covid_age$`Daily Total Cases`)
accuracy(arimafor2,covid_age$`Daily Total Cases`[170:190])
## ME RMSE MAE MPE MAPE MASE
## Training set 3.028713 136.9559 97.34124 -4.628462 34.51657 0.8468401
## Test set 8.218577 145.8876 129.25212 -1351.584221 1384.12906 1.1244553
## ACF1
## Training set -0.01091548
## Test set NA
covid_age$`Daily Total Cases`[190]
## [1] 129
arimafor2$mean[10]
## [1] 276.4378
mean(arimafor2$mean)
## [1] 288.6386
So the plan is to use a variety of methods to predict/forecast the number of COVID-19 cases and deaths and hospitalized cases.
Once we get the numbers, we will use that to calculate the difference between actual number and predicted/forecasted numbers to see how far off our predictions are.
# Forecasting of cumulative cases
dates = seq(as.Date("2020-03-01"), as.Date("2020-09-06"), by=1)
adf.test(cumulcase)
##
## Augmented Dickey-Fuller Test
##
## data: cumulcase
## Dickey-Fuller = -1.5571, Lag order = 5, p-value = 0.7615
## alternative hypothesis: stationary
chiArimaExp <- auto.arima(cumulcase[1:169], seasonal = FALSE)
chicaForecast <- forecast(chiArimaExp, h=21)
plot(chicaForecast)
lines(cumulcase)
accuracy(chicaForecast,cumulcase[170:190])
## ME RMSE MAE MPE MAPE MASE
## Training set 3.023006 136.9404 97.2924 2.3178667 2.6674173 0.2443838
## Test set 570.670226 628.7905 570.6702 0.8016514 0.8016514 1.4334375
## ACF1
## Training set -0.01075349
## Test set NA
#range(confirmed$Illinois[201:230])
#range(forecasted$mean)
range(cumulcase)
## [1] 0 73117
range(chicaForecast$mean)
## [1] 67180.90 72950.84
differ <- function(nums){
numdiff = c()
x = 1
y = 2
idx = 1
while(y<length(nums)){
temp = nums[y] - nums[x]
numdiff[idx] = temp
x= x+1
y = y+1
idx = idx+1
}
return(numdiff)
}
diffs = differ(chicaForecast$mean)
mean(diffs)
## [1] 289.1688
library(COVID19)
## Warning: package 'COVID19' was built under R version 4.0.3
library(plotly)
## Warning: package 'plotly' was built under R version 4.0.3
##
## Attaching package: 'plotly'
## The following object is masked from 'package:Hmisc':
##
## subplot
## The following object is masked from 'package:ggplot2':
##
## last_plot
## The following object is masked from 'package:stats':
##
## filter
## The following object is masked from 'package:graphics':
##
## layout
library(reshape2)
## Warning: package 'reshape2' was built under R version 4.0.3
##
## Attaching package: 'reshape2'
## The following object is masked from 'package:tidyr':
##
## smiths
library(forecast)
library(prophet)
## Loading required package: Rcpp
## Loading required package: rlang
library(deSolve)
## Warning: package 'deSolve' was built under R version 4.0.3
library(tidyverse)
## Warning: package 'tidyverse' was built under R version 4.0.3
## -- Attaching packages --------------------------------------- tidyverse 1.3.0 --
## v tibble 3.0.3 v stringr 1.4.0
## v readr 1.3.1 v forcats 0.5.0
## v purrr 0.3.4
## -- Conflicts ------------------------------------------ tidyverse_conflicts() --
## x purrr::%@%() masks rlang::%@%()
## x lubridate::as.difftime() masks base::as.difftime()
## x purrr::as_function() masks rlang::as_function()
## x lubridate::date() masks base::date()
## x plotly::filter() masks dplyr::filter(), stats::filter()
## x purrr::flatten() masks rlang::flatten()
## x purrr::flatten_chr() masks rlang::flatten_chr()
## x purrr::flatten_dbl() masks rlang::flatten_dbl()
## x purrr::flatten_int() masks rlang::flatten_int()
## x purrr::flatten_lgl() masks rlang::flatten_lgl()
## x purrr::flatten_raw() masks rlang::flatten_raw()
## x lubridate::intersect() masks base::intersect()
## x purrr::invoke() masks rlang::invoke()
## x dplyr::lag() masks stats::lag()
## x purrr::list_along() masks rlang::list_along()
## x purrr::modify() masks rlang::modify()
## x purrr::prepend() masks rlang::prepend()
## x lubridate::setdiff() masks base::setdiff()
## x purrr::splice() masks rlang::splice()
## x dplyr::src() masks Hmisc::src()
## x dplyr::summarize() masks Hmisc::summarize()
## x lubridate::union() masks base::union()
#USA <- covid19(c("US"), level=2)
#write.csv(USA,"USA.CSV",row.names = FALSE)
#US = read.csv("USA.CSV")
# Only 2:21 and administrative_area_level_2 are important
# Read in the new data and subset based on categories
USA = read.csv("NewUSA.csv")
date = seq(as.Date("2020-03-16"), as.Date("2020-10-31"), by = "days")
columnanme = c("State","Value")
columnanme = c(columnanme,date)
columnanme[233] = "Grand Total"
tests = USA[USA$Value=="Sum of tests",]
tests = tests[,-2]
states = tests$State
tt = t(tests);tt = as.data.frame(tt)
index = seq(1,56,by=1)
colnames(tt) <- states
tt = tt[-1,]
tt = tt[-231,]
tt[states] = sapply(tt,as.numeric); tt = as.data.frame(tt)
tt["Date"] = date
row.names(tt) <- NULL
subsetdata <- function(data, name,columnVal){
name <- data[data$Value== columnVal,]
name = name [,-2]
stat = name$State
names = t(name); names= as.data.frame(names)
colnames(names) <- stat
names = names[-1,]
names = names[-231,]
names[stat] = sapply(names,as.numeric); names = as.data.frame(names)
#names["Date"] = date
row.names(names) = NULL
return(names)
}
test = subsetdata(USA, "tests","Sum of tests")
icu = subsetdata(USA, "icu","Sum of icu")
confirmed = subsetdata(USA, "confirmed","Sum of confirmed")
recovered = subsetdata(USA, "recovered","Sum of recovered")
hospitalized = subsetdata(USA, "hospitalized","Sum of hosp")
death = subsetdata(USA, "death","Sum of deaths")
venting = subsetdata(USA, "venting","Sum of vent")
# Ordering the subsets
order_date <- function(dataset){
newdata = dataset[,order(-dataset[230,])]
newdata["Date"] = date
return(newdata)
}
test = order_date(test)
icu = order_date(icu)
confirmed = order_date(confirmed)
recovered = order_date(recovered)
hospitalized = order_date(hospitalized)
death = order_date(death)
venting = order_date(venting)
# Top 10 states with highest cumulative cases to 31st October
top10case <- confirmed[,1:10]; top10case["Date"] = date
fig10 <- plot_ly(x=as.Date(top10case$Date))
names = colnames(top10case)
for(i in names[-11]){
fig10 <- add_trace(fig10,x=as.Date(top10case$Date), y=top10case[,i], mode="lines", name=i)
}
fig10
## No trace type specified:
## Based on info supplied, a 'scatter' trace seems appropriate.
## Read more about this trace type -> https://plot.ly/r/reference/#scatter
## Warning: `arrange_()` is deprecated as of dplyr 0.7.0.
## Please use `arrange()` instead.
## See vignette('programming') for more help
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_warnings()` to see where this warning was generated.
## No trace type specified:
## Based on info supplied, a 'scatter' trace seems appropriate.
## Read more about this trace type -> https://plot.ly/r/reference/#scatter
## No trace type specified:
## Based on info supplied, a 'scatter' trace seems appropriate.
## Read more about this trace type -> https://plot.ly/r/reference/#scatter
## No trace type specified:
## Based on info supplied, a 'scatter' trace seems appropriate.
## Read more about this trace type -> https://plot.ly/r/reference/#scatter
## No trace type specified:
## Based on info supplied, a 'scatter' trace seems appropriate.
## Read more about this trace type -> https://plot.ly/r/reference/#scatter
## No trace type specified:
## Based on info supplied, a 'scatter' trace seems appropriate.
## Read more about this trace type -> https://plot.ly/r/reference/#scatter
## No trace type specified:
## Based on info supplied, a 'scatter' trace seems appropriate.
## Read more about this trace type -> https://plot.ly/r/reference/#scatter
## No trace type specified:
## Based on info supplied, a 'scatter' trace seems appropriate.
## Read more about this trace type -> https://plot.ly/r/reference/#scatter
## No trace type specified:
## Based on info supplied, a 'scatter' trace seems appropriate.
## Read more about this trace type -> https://plot.ly/r/reference/#scatter
## No trace type specified:
## Based on info supplied, a 'scatter' trace seems appropriate.
## Read more about this trace type -> https://plot.ly/r/reference/#scatter
# Top 10 states with highest cumulative test done to 31st October
top10test <- test[,1:10]; top10test["Date"] = date
fig11 <- plot_ly(x=as.Date(top10test$Date))
names = colnames(top10test)
for(i in names[-11]){
fig11 <- add_trace(fig11,x=as.Date(top10test$Date), y=top10test[,i], mode="lines", name=i)
}
fig11
## No trace type specified:
## Based on info supplied, a 'scatter' trace seems appropriate.
## Read more about this trace type -> https://plot.ly/r/reference/#scatter
## No trace type specified:
## Based on info supplied, a 'scatter' trace seems appropriate.
## Read more about this trace type -> https://plot.ly/r/reference/#scatter
## No trace type specified:
## Based on info supplied, a 'scatter' trace seems appropriate.
## Read more about this trace type -> https://plot.ly/r/reference/#scatter
## No trace type specified:
## Based on info supplied, a 'scatter' trace seems appropriate.
## Read more about this trace type -> https://plot.ly/r/reference/#scatter
## No trace type specified:
## Based on info supplied, a 'scatter' trace seems appropriate.
## Read more about this trace type -> https://plot.ly/r/reference/#scatter
## No trace type specified:
## Based on info supplied, a 'scatter' trace seems appropriate.
## Read more about this trace type -> https://plot.ly/r/reference/#scatter
## No trace type specified:
## Based on info supplied, a 'scatter' trace seems appropriate.
## Read more about this trace type -> https://plot.ly/r/reference/#scatter
## No trace type specified:
## Based on info supplied, a 'scatter' trace seems appropriate.
## Read more about this trace type -> https://plot.ly/r/reference/#scatter
## No trace type specified:
## Based on info supplied, a 'scatter' trace seems appropriate.
## Read more about this trace type -> https://plot.ly/r/reference/#scatter
## No trace type specified:
## Based on info supplied, a 'scatter' trace seems appropriate.
## Read more about this trace type -> https://plot.ly/r/reference/#scatter
# Top 10 states with highest cumulative hospitalized cases to 31st October
top10hosp <- hospitalized[,1:10]; top10hosp["Date"] = date
fig12 <- plot_ly(x=as.Date(top10hosp$Date))
names = colnames(top10hosp)
for(i in names[-11]){
fig12 <- add_trace(fig12,x=as.Date(top10hosp$Date), y=top10hosp[,i], mode="lines", name=i)
}
fig12
## No trace type specified:
## Based on info supplied, a 'scatter' trace seems appropriate.
## Read more about this trace type -> https://plot.ly/r/reference/#scatter
## No trace type specified:
## Based on info supplied, a 'scatter' trace seems appropriate.
## Read more about this trace type -> https://plot.ly/r/reference/#scatter
## No trace type specified:
## Based on info supplied, a 'scatter' trace seems appropriate.
## Read more about this trace type -> https://plot.ly/r/reference/#scatter
## No trace type specified:
## Based on info supplied, a 'scatter' trace seems appropriate.
## Read more about this trace type -> https://plot.ly/r/reference/#scatter
## No trace type specified:
## Based on info supplied, a 'scatter' trace seems appropriate.
## Read more about this trace type -> https://plot.ly/r/reference/#scatter
## No trace type specified:
## Based on info supplied, a 'scatter' trace seems appropriate.
## Read more about this trace type -> https://plot.ly/r/reference/#scatter
## No trace type specified:
## Based on info supplied, a 'scatter' trace seems appropriate.
## Read more about this trace type -> https://plot.ly/r/reference/#scatter
## No trace type specified:
## Based on info supplied, a 'scatter' trace seems appropriate.
## Read more about this trace type -> https://plot.ly/r/reference/#scatter
## No trace type specified:
## Based on info supplied, a 'scatter' trace seems appropriate.
## Read more about this trace type -> https://plot.ly/r/reference/#scatter
## No trace type specified:
## Based on info supplied, a 'scatter' trace seems appropriate.
## Read more about this trace type -> https://plot.ly/r/reference/#scatter
# Top 10 states with highest ventilation to 31st October
top10vent <- venting[,1:10]; top10vent["Date"] = date
fig13 <- plot_ly(x=as.Date(top10vent$Date))
names = colnames(top10vent)
for(i in names[-11]){
fig13 <- add_trace(fig13,x=as.Date(top10vent$Date), y=top10vent[,i], mode="lines", name=i)
}
fig13
## No trace type specified:
## Based on info supplied, a 'scatter' trace seems appropriate.
## Read more about this trace type -> https://plot.ly/r/reference/#scatter
## No trace type specified:
## Based on info supplied, a 'scatter' trace seems appropriate.
## Read more about this trace type -> https://plot.ly/r/reference/#scatter
## No trace type specified:
## Based on info supplied, a 'scatter' trace seems appropriate.
## Read more about this trace type -> https://plot.ly/r/reference/#scatter
## No trace type specified:
## Based on info supplied, a 'scatter' trace seems appropriate.
## Read more about this trace type -> https://plot.ly/r/reference/#scatter
## No trace type specified:
## Based on info supplied, a 'scatter' trace seems appropriate.
## Read more about this trace type -> https://plot.ly/r/reference/#scatter
## No trace type specified:
## Based on info supplied, a 'scatter' trace seems appropriate.
## Read more about this trace type -> https://plot.ly/r/reference/#scatter
## No trace type specified:
## Based on info supplied, a 'scatter' trace seems appropriate.
## Read more about this trace type -> https://plot.ly/r/reference/#scatter
## No trace type specified:
## Based on info supplied, a 'scatter' trace seems appropriate.
## Read more about this trace type -> https://plot.ly/r/reference/#scatter
## No trace type specified:
## Based on info supplied, a 'scatter' trace seems appropriate.
## Read more about this trace type -> https://plot.ly/r/reference/#scatter
## No trace type specified:
## Based on info supplied, a 'scatter' trace seems appropriate.
## Read more about this trace type -> https://plot.ly/r/reference/#scatter
# Top 10 Death states to 31st October
top10death <- death[,1:10]; top10death["Date"] = date
fig14 <- plot_ly(x=as.Date(top10death$Date))
names = colnames(top10death)
for(i in names[-11]){
fig14 <- add_trace(fig14,x=as.Date(top10death$Date), y=top10death[,i], mode="lines", name=i)
}
fig14
## No trace type specified:
## Based on info supplied, a 'scatter' trace seems appropriate.
## Read more about this trace type -> https://plot.ly/r/reference/#scatter
## No trace type specified:
## Based on info supplied, a 'scatter' trace seems appropriate.
## Read more about this trace type -> https://plot.ly/r/reference/#scatter
## No trace type specified:
## Based on info supplied, a 'scatter' trace seems appropriate.
## Read more about this trace type -> https://plot.ly/r/reference/#scatter
## No trace type specified:
## Based on info supplied, a 'scatter' trace seems appropriate.
## Read more about this trace type -> https://plot.ly/r/reference/#scatter
## No trace type specified:
## Based on info supplied, a 'scatter' trace seems appropriate.
## Read more about this trace type -> https://plot.ly/r/reference/#scatter
## No trace type specified:
## Based on info supplied, a 'scatter' trace seems appropriate.
## Read more about this trace type -> https://plot.ly/r/reference/#scatter
## No trace type specified:
## Based on info supplied, a 'scatter' trace seems appropriate.
## Read more about this trace type -> https://plot.ly/r/reference/#scatter
## No trace type specified:
## Based on info supplied, a 'scatter' trace seems appropriate.
## Read more about this trace type -> https://plot.ly/r/reference/#scatter
## No trace type specified:
## Based on info supplied, a 'scatter' trace seems appropriate.
## Read more about this trace type -> https://plot.ly/r/reference/#scatter
## No trace type specified:
## Based on info supplied, a 'scatter' trace seems appropriate.
## Read more about this trace type -> https://plot.ly/r/reference/#scatter
# Forecasting of Illinois cumulative cases
acf_case = acf(diff(diff(confirmed$Illinois)))
adf.test(confirmed$Illinois)
## Warning in adf.test(confirmed$Illinois): p-value greater than printed p-value
##
## Augmented Dickey-Fuller Test
##
## data: confirmed$Illinois
## Dickey-Fuller = 1.2115, Lag order = 6, p-value = 0.99
## alternative hypothesis: stationary
p = ggplot(data = confirmed, aes(x=as.Date(Date),y=Illinois)) + geom_point()
p + geom_smooth(se=TRUE)
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'
arima_case = auto.arima(confirmed$Illinois[1:200])
forecasted = forecast(arima_case, h=30)
plot(forecasted)
lines(confirmed$Illinois, col='red')
accuracy(forecasted,confirmed$Illinois[201:230])
## ME RMSE MAE MPE MAPE MASE
## Training set 35.30461 493.3245 324.3018 0.6896976 0.9663941 0.2166832
## Test set 17651.08794 24651.7395 17659.9893 4.6794212 4.6823136 11.7995738
## ACF1
## Training set 0.07694218
## Test set NA
range(confirmed$Illinois[201:230])
## [1] 300385 416559
range(forecasted$mean)
## [1] 299919.9 357656.1
# SIR model of bad case scenario
iniv = c(S=1300,I=1,R=0)
parameters = c(gammaV=(1/14), beta=0.80)
time = seq(from=1,t=365,by=1)
SIR_model <- function(time, initial, parameter){
with(as.list(c(initial,parameter)),{
N = S+I+R
dS = -(beta*S*I)/N
dI = (beta*S*I)/N - (gammaV*I)
dR = (gammaV*I)
return(list(c(dS,dI,dR)))
}
)
}
output = ode(y=iniv,func=SIR_model,parms=parameters,times=time)
output = as.data.frame(output)
ggplot(data = output, aes(x=time)) + geom_line(aes(y=S),color="blue") + geom_line(aes(y=I),color="Red") + geom_line(aes(y=R),color ='green') + geom_hline(yintercept = 400) + labs(x='Days passed',y='Population')
# SIR model of good case scenario
iniv = c(S=1300,I=1,R=0)
parameters = c(gammaV=(1/14), beta=0.10)
time = seq(from=1,t=365,by=1)
SIR_model <- function(time, initial, parameter){
with(as.list(c(initial,parameter)),{
N = S+I+R
dS = -(beta*S*I)/N
dI = (beta*S*I)/N - (gammaV*I)
dR = (gammaV*I)
return(list(c(dS,dI,dR)))
}
)
}
output = ode(y=iniv,func=SIR_model,parms=parameters,times=time)
output = as.data.frame(output)
ggplot(data = output, aes(x=time)) + geom_line(aes(y=S),color="blue") + geom_line(aes(y=I),color="Red") + geom_line(aes(y=R),color ='green') + geom_hline(yintercept = 400) + labs(x='Days passed',y='Population')